home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PsL Monthly 1993 December
/
PSL Monthly Shareware CD-ROM (December 1993).iso
/
prgmming
/
dos
/
c
/
str.exe
/
MATCH.CPP
< prev
next >
Wrap
C/C++ Source or Header
|
1993-03-15
|
15KB
|
515 lines
/*
*
* Author: Allen I. Holub
*
* (c) C Gazette. May be used freely as long as author and publication are
* acknowledged
*
* Roy S. Woll (Revision 2.0)
* 1032 Summerplace Dr.
* San Jose, CA 95122
*
* ----------------------------------------------------------------------
*
*
* Revision 2.02 14 Mar 1993 ROY S. WOLL
*
* Fixed octal set definition to include first octel digit
*
* Revision 2.0 16 Nov 1992 ROY S. WOLL
*
* Initial revision for match.c -> match.cpp
* Compatibility with C++ syntax, and now member functions of regX.h
* to avoid polluting the name space.
* Fixed some inconsistencies. Regular expression compiled pattern should
* now grow as needed. Case insensitive support.
*
* Revision 1 27 Jan 1991 Allen I. Holub
*
*/
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "regximp.h"
inline const char * max(const char * x, const char * y)
{if (x>y) return x; else return y;}
/* Metacharacters in the input: */
#define BOL '^' /* start-of-line anchor */
#define EOL '$' /* end-of-line anchor */
#define ANY '.' /* matches any character */
#define CCL '[' /* start a character class */
#define CCLEND ']' /* end a character class */
#define NCCL '^' /* negates character class if 1st char. */
#define CLOSURE '*' /* Kleene closure (matches 0 or more) */
#define PCLOSE '+' /* Positive closure (1 or more) */
#define OPT '?' /* Optional closure (0 or 1) */
typedef enum action { // These are put in the pattern string
// to represent metacharacters.
M_BOL = (0x80 | '^'),
M_EOL = (0x80 | '$'),
M_ANY = (0x80 | '.'),
M_CCL = (0x80 | '['),
M_OPT = (0x80 | '?'),
M_CLOSE = (0x80 | '*'),
M_PCLOSE = (0x80 | '+')
} action;
typedef unsigned char pattern; /* pattern strings are unsigned char */
#define IS_ACTION(x) ((x)&0x80) /* true => element of pat. string is an */
/* action that represents a metacharacter */
/*----------------------------------------------------------------------*/
#define MAPSIZE 16 /* need this many bytes for character class bit map */
/*
* Advance a pointer into the pattern template
* to the next pattern element, this is a +1 for
* all pattern elements but M_CCL, where you
* to skip past both the M_CCL character and the
* bitmap that follows that character
*/
#define ADVANCE(pat) (pat += (*pat == (pattern)M_CCL) ? (MAPSIZE+1) : 1)
//
// Bitmap functions. Set bit b in the map and
// test bit b to see if it was set previously.
//
#define SETBIT(b,map) ((map)[((b) & 0x7f) >>3] |= (1<< ((b) & 0x07)) )
#define TSTBIT(b,map) ((map)[((b) & 0x7f) >>3] & (1<< ((b) & 0x07)) )
int regXimp::omatch(const char ** strp, const pattern * pat,
const char * start)
{
/*
* Match one pattern element, pointed at by pat, against the character at
* **strp. Return 0 on a failure, 1 on success. *strp is advanced to skip
* over the matched character on a successful match. Closure is handled one
* level up by patcmp().
*
* "start" points at the character at the left edge of the line. This might
* not be the same thing as *strp if the search is starting in the middle
* of the string. An end-of- line anchor matches '\n' or '\0'.
*/
int advance = -1; // amount to advance *strp, -1 == error
switch (*pat) {
case M_BOL: // First char in string?
if (*strp == start) // Only one star here.
advance = 0;
break;
case M_ANY: // . = anything but newline
if (**strp != '\n') advance = 1;
break;
case M_EOL:
if (**strp == '\n' || **strp == '\0')
advance = 0;
break;
case M_CCL:
if (TSTBIT(**strp, pat + 1)) advance = 1;
break;
default: /* literal match */
if (caseSensitive){
if (**strp == *pat) advance = 1;
}
else if (toupper(**strp) == toupper(*pat)) advance = 1;
break;
}
if (advance > 0)
*strp += advance;
return (advance + 1);
}
#define ISOCTDIGIT(x) ('0'<=(x) && (x)<='7')
static int hex2bin(int c)
{
/* Convert the hex digit represented by 'c' to an int. 'c'
* must be one of: 0123456789abcdefABCDEF
*/
return (isdigit(c) ? (c)-'0': ((toupper(c))-'A')+10) & 0xf;
}
static int oct2bin(int c)
{
/* Convert the hex digit represented by 'c' to an int. 'c'
* must be a digit in the range '0'-'7'.
*/
return ( ((c)-'0') & 0x7 );
}
/*------------------------------------------------------------*/
int esc(const char **s)
{
/* Map escape sequences into their equivalent symbols. Return
* the equivalent ASCII character. *s is advanced past the
* escape sequence. If no escape sequence is present, the
* current character is returned and the string is advanced by
* one. The following are recognized:
*
* \b backspace
* \f formfeed
* \n newline
* \r carriage return
* \s space
* \t tab
* \e ASCII ESC character ('\033')
* \DDD number formed of 1-3 octal digits
* \xDDD number formed of 1-3 hex digits
* \^C C = any letter. Control code
*/
int rval;
if( **s != '\\' )
rval = *( (*s)++ );
else {
++(*s); // Skip the '\'
switch( toupper(**s) ) {
case '\0': rval = '\\'; break;
case 'B': rval = '\b' ; break;
case 'F': rval = '\f' ; break;
case 'N': rval = '\n' ; break;
case 'R': rval = '\r' ; break;
case 'S': rval = ' ' ; break;
case 'T': rval = '\t' ; break;
case 'E': rval = '\033'; break;
case '^':
rval = *++(*s) ;
rval = toupper(rval) - '@' ;
break;
case 'X':
rval = 0;
++(*s);
if( isxdigit(**s) ) {
rval = hex2bin( *(*s)++ );
}
if( isxdigit(**s) ) {
rval <<= 4;
rval |= hex2bin( *(*s)++ );
}
if( isxdigit(**s) ) {
rval <<= 4;
rval |= hex2bin( *(*s)++ );
}
--(*s);
break;
default:
if( !ISOCTDIGIT(**s) )
rval = **s;
else {
rval = oct2bin( *(*s)++ );
if( ISOCTDIGIT(**s) ) {
rval <<= 3;
rval |= oct2bin( *(*s)++ );
}
if( ISOCTDIGIT(**s) ) {
rval <<= 3;
rval |= oct2bin( *(*s)++ );
}
--(*s);
}
break;
}
++(*s);
}
return rval;
}
/*----------------------------------------------------------------------*/
const char * regXimp::doccl(const char * src)
{
/*
* Set bits in the map corresponding to characters specified in the src
* character class.
*/
int first, last, negative;
++src; // skip past the [
negative = (*src == NCCL);
if (negative) ++src; // check for negative ccl
const char * start = src; // start of characters in class
int len = compiledPattern.length();
compiledPattern.pad(len+MAPSIZE, str::right, char(0));
char * map = (char *)compiledPattern(len);
while (*src && *src != CCLEND) {
if (*src != '-') {